1 /** 2 * Hash Set 3 * Copyright: © 2015 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 8 module containers.hashset; 9 10 private import containers.internal.hash : generateHash, hashToIndex; 11 private import containers.internal.node : shouldAddGCRange; 12 private import std.experimental.allocator.mallocator : Mallocator; 13 private import std.traits : isBasicType; 14 15 /** 16 * Hash Set. 17 * Params: 18 * T = the element type 19 * Allocator = the allocator to use. Defaults to `Mallocator`. 20 * hashFunction = the hash function to use on the elements 21 * supportGC = true if the container should support holding references to 22 * GC-allocated memory. 23 */ 24 struct HashSet(T, Allocator = Mallocator, alias hashFunction = generateHash!T, 25 bool supportGC = shouldAddGCRange!T, 26 bool storeHash = !isBasicType!T) 27 { 28 this(this) @disable; 29 30 private import std.experimental.allocator.common : stateSize; 31 32 static if (stateSize!Allocator != 0) 33 { 34 this() @disable; 35 36 /** 37 * Use the given `allocator` for allocations. 38 */ 39 this(Allocator allocator) 40 in 41 { 42 assert(allocator !is null, "Allocator must not be null"); 43 } 44 do 45 { 46 this.allocator = allocator; 47 } 48 49 /** 50 * Constructs a HashSet with an initial bucket count of bucketCount. 51 * bucketCount must be a power of two. 52 */ 53 this(size_t bucketCount, Allocator allocator) 54 in 55 { 56 assert(allocator !is null, "Allocator must not be null"); 57 assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 58 } 59 do 60 { 61 this.allocator = allocator; 62 initialize(bucketCount); 63 } 64 } 65 else 66 { 67 /** 68 * Constructs a HashSet with an initial bucket count of bucketCount. 69 * bucketCount must be a power of two. 70 */ 71 this(size_t bucketCount) 72 in 73 { 74 assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 75 } 76 do 77 { 78 initialize(bucketCount); 79 } 80 81 } 82 83 ~this() 84 { 85 import std.experimental.allocator : dispose; 86 import core.memory : GC; 87 static if (useGC) 88 GC.removeRange(buckets.ptr); 89 allocator.dispose(buckets); 90 } 91 92 /** 93 * Removes all items from the set 94 */ 95 void clear() 96 { 97 foreach (ref bucket; buckets) 98 { 99 destroy(bucket); 100 bucket = Bucket.init; 101 } 102 _length = 0; 103 } 104 105 /** 106 * Removes the given item from the set. 107 * Returns: false if the value was not present 108 */ 109 bool remove(T value) 110 { 111 if (buckets.length == 0) 112 return false; 113 immutable Hash hash = hashFunction(value); 114 immutable size_t index = hashToIndex(hash, buckets.length); 115 static if (storeHash) 116 immutable bool removed = buckets[index].remove(ItemNode(hash, value)); 117 else 118 immutable bool removed = buckets[index].remove(ItemNode(value)); 119 if (removed) 120 --_length; 121 return removed; 122 } 123 124 /** 125 * Returns: true if value is contained in the set. 126 */ 127 bool contains(T value) inout 128 { 129 return (value in this) !is null; 130 } 131 132 /** 133 * Supports $(B a in b) syntax 134 */ 135 inout(T)* opBinaryRight(string op)(T value) inout if (op == "in") 136 { 137 if (buckets.length == 0 || _length == 0) 138 return null; 139 immutable Hash hash = hashFunction(value); 140 immutable index = hashToIndex(hash, buckets.length); 141 return buckets[index].get(value, hash); 142 } 143 144 /** 145 * Inserts the given item into the set. 146 * Params: value = the value to insert 147 * Returns: true if the value was actually inserted, or false if it was 148 * already present. 149 */ 150 bool insert(T value) 151 { 152 if (buckets.length == 0) 153 initialize(4); 154 Hash hash = hashFunction(value); 155 immutable size_t index = hashToIndex(hash, buckets.length); 156 static if (storeHash) 157 auto r = buckets[index].insert(ItemNode(hash, value)); 158 else 159 auto r = buckets[index].insert(ItemNode(value)); 160 if (r) 161 ++_length; 162 if (shouldRehash) 163 rehash(); 164 return r; 165 } 166 167 /// ditto 168 bool opOpAssign(string op)(T item) if (op == "~") 169 { 170 return insert(item); 171 } 172 173 /// ditto 174 alias put = insert; 175 176 /// ditto 177 alias insertAnywhere = insert; 178 179 /** 180 * Returns: true if the set has no items 181 */ 182 bool empty() const nothrow pure @nogc @safe @property 183 { 184 return _length == 0; 185 } 186 187 /** 188 * Returns: the number of items in the set 189 */ 190 size_t length() const nothrow pure @nogc @safe @property 191 { 192 return _length; 193 } 194 195 /** 196 * Forward range interface 197 */ 198 auto opSlice(this This)() nothrow @nogc @trusted 199 { 200 return Range!(This)(&this); 201 } 202 203 private: 204 205 import containers.internal.element_type : ContainerElementType; 206 import containers.internal.mixins : AllocatorState; 207 import containers.internal.node : shouldAddGCRange, FatNodeInfo; 208 import containers.internal.storage_type : ContainerStorageType; 209 import std.traits : isPointer; 210 211 alias LengthType = ubyte; 212 alias N = FatNodeInfo!(ItemNode.sizeof, 1, 64, LengthType.sizeof); 213 enum ITEMS_PER_NODE = N[0]; 214 static assert(LengthType.max > ITEMS_PER_NODE); 215 enum bool useGC = supportGC && shouldAddGCRange!T; 216 alias Hash = typeof({ T v = void; return hashFunction(v); }()); 217 218 void initialize(size_t bucketCount) 219 { 220 import core.memory : GC; 221 import std.experimental.allocator : makeArray; 222 223 makeBuckets(bucketCount); 224 static if (useGC) 225 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 226 } 227 228 static struct Range(ThisT) 229 { 230 this(ThisT* t) 231 { 232 foreach (i, ref bucket; t.buckets) 233 { 234 bucketIndex = i; 235 if (bucket.root !is null) 236 { 237 currentNode = cast(Bucket.BucketNode*) bucket.root; 238 break; 239 } 240 } 241 this.t = t; 242 } 243 244 bool empty() const nothrow @safe @nogc @property 245 { 246 return currentNode is null; 247 } 248 249 ET front() nothrow @safe @nogc @property 250 { 251 return cast(ET) currentNode.items[nodeIndex].value; 252 } 253 254 void popFront() nothrow @trusted @nogc 255 { 256 if (nodeIndex + 1 < currentNode.l) 257 { 258 ++nodeIndex; 259 return; 260 } 261 else 262 { 263 nodeIndex = 0; 264 if (currentNode.next is null) 265 { 266 ++bucketIndex; 267 while (bucketIndex < t.buckets.length && t.buckets[bucketIndex].root is null) 268 ++bucketIndex; 269 if (bucketIndex < t.buckets.length) 270 currentNode = cast(Bucket.BucketNode*) t.buckets[bucketIndex].root; 271 else 272 currentNode = null; 273 } 274 else 275 { 276 currentNode = currentNode.next; 277 assert(currentNode.l > 0, "Empty node"); 278 } 279 } 280 } 281 282 private: 283 alias ET = ContainerElementType!(ThisT, T); 284 ThisT* t; 285 Bucket.BucketNode* currentNode; 286 size_t bucketIndex; 287 size_t nodeIndex; 288 } 289 290 void makeBuckets(size_t bucketCount) 291 { 292 import std.experimental.allocator : makeArray; 293 294 static if (stateSize!Allocator == 0) 295 buckets = allocator.makeArray!Bucket(bucketCount); 296 else 297 { 298 import std.conv:emplace; 299 300 buckets = cast(Bucket[]) allocator.allocate(Bucket.sizeof * bucketCount); 301 foreach (ref bucket; buckets) 302 emplace!Bucket(&bucket, allocator); 303 } 304 } 305 306 bool shouldRehash() const pure nothrow @safe @nogc 307 { 308 immutable float numberOfNodes = cast(float) _length / cast(float) ITEMS_PER_NODE; 309 return (numberOfNodes / cast(float) buckets.length) > 0.75f; 310 } 311 312 void rehash() @trusted 313 { 314 import std.experimental.allocator : makeArray, dispose; 315 import core.memory : GC; 316 317 immutable size_t newLength = buckets.length << 1; 318 Bucket[] oldBuckets = buckets; 319 makeBuckets(newLength); 320 assert (buckets, "Bucket reallocation failed"); 321 assert (buckets.length == newLength, "Bucket reallocation size mismatch"); 322 static if (useGC) 323 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 324 foreach (ref const bucket; oldBuckets) 325 { 326 for (Bucket.BucketNode* node = cast(Bucket.BucketNode*) bucket.root; node !is null; node = node.next) 327 { 328 for (size_t i = 0; i < node.l; ++i) 329 { 330 static if (storeHash) 331 { 332 immutable Hash hash = node.items[i].hash; 333 size_t index = hashToIndex(hash, buckets.length); 334 buckets[index].insert(ItemNode(hash, node.items[i].value)); 335 } 336 else 337 { 338 immutable Hash hash = hashFunction(node.items[i].value); 339 size_t index = hashToIndex(hash, buckets.length); 340 buckets[index].insert(ItemNode(node.items[i].value)); 341 } 342 } 343 } 344 } 345 static if (useGC) 346 GC.removeRange(oldBuckets.ptr); 347 allocator.dispose(oldBuckets); 348 } 349 350 static struct Bucket 351 { 352 this(this) @disable; 353 354 static if (stateSize!Allocator != 0) 355 { 356 this(Allocator allocator) 357 { 358 this.allocator = allocator; 359 } 360 this() @disable; 361 } 362 363 ~this() 364 { 365 import core.memory : GC; 366 import std.experimental.allocator : dispose; 367 368 BucketNode* current = root; 369 BucketNode* previous; 370 while (true) 371 { 372 if (previous !is null) 373 { 374 static if (useGC) 375 GC.removeRange(previous); 376 allocator.dispose(previous); 377 } 378 previous = current; 379 if (current is null) 380 break; 381 current = current.next; 382 } 383 } 384 385 static struct BucketNode 386 { 387 ContainerStorageType!(T)* get(ItemNode n) 388 { 389 foreach (ref item; items[0 .. l]) 390 { 391 static if (storeHash) 392 { 393 static if (isPointer!T) 394 { 395 if (item.hash == n.hash && *item.value == *n.value) 396 return &item.value; 397 } 398 else 399 { 400 if (item.hash == n.hash && item.value == n.value) 401 return &item.value; 402 } 403 } 404 else 405 { 406 static if (isPointer!T) 407 { 408 if (*item.value == *n.value) 409 return &item.value; 410 } 411 else 412 { 413 if (item.value == n.value) 414 return &item.value; 415 } 416 } 417 } 418 return null; 419 } 420 421 void insert(ItemNode n) 422 { 423 items[l] = n; 424 ++l; 425 } 426 427 bool remove(ItemNode n) 428 { 429 import std.algorithm : SwapStrategy, remove; 430 431 foreach (size_t i, ref node; items[0 .. l]) 432 { 433 static if (storeHash) 434 { 435 static if (isPointer!T) 436 immutable bool matches = node.hash == n.hash && *node.value == *n.value; 437 else 438 immutable bool matches = node.hash == n.hash && node.value == n.value; 439 } 440 else 441 { 442 static if (isPointer!T) 443 immutable bool matches = *node.value == *n.value; 444 else 445 immutable bool matches = node.value == n.value; 446 } 447 if (matches) 448 { 449 items[0 .. l].remove!(SwapStrategy.unstable)(i); 450 l--; 451 return true; 452 } 453 } 454 return false; 455 } 456 457 BucketNode* next; 458 LengthType l; 459 ItemNode[ITEMS_PER_NODE] items; 460 } 461 462 bool insert(ItemNode n) 463 { 464 import core.memory : GC; 465 import std.experimental.allocator : make; 466 467 BucketNode* hasSpace = null; 468 for (BucketNode* current = root; current !is null; current = current.next) 469 { 470 if (current.get(n) !is null) 471 return false; 472 if (current.l < current.items.length) 473 hasSpace = current; 474 } 475 if (hasSpace !is null) 476 hasSpace.insert(n); 477 else 478 { 479 BucketNode* newNode = make!BucketNode(allocator); 480 static if (useGC) 481 GC.addRange(newNode, BucketNode.sizeof); 482 newNode.insert(n); 483 newNode.next = root; 484 root = newNode; 485 } 486 return true; 487 } 488 489 bool remove(ItemNode n) 490 { 491 import core.memory : GC; 492 import std.experimental.allocator : dispose; 493 494 BucketNode* current = root; 495 BucketNode* previous; 496 while (current !is null) 497 { 498 immutable removed = current.remove(n); 499 if (removed) 500 { 501 if (current.l == 0) 502 { 503 if (previous !is null) 504 previous.next = current.next; 505 else 506 root = current.next; 507 static if (useGC) 508 GC.removeRange(current); 509 allocator.dispose(current); 510 } 511 return true; 512 } 513 previous = current; 514 current = current.next; 515 } 516 return false; 517 } 518 519 inout(T)* get(T value, Hash hash) inout 520 { 521 for (BucketNode* current = cast(BucketNode*) root; current !is null; current = current.next) 522 { 523 static if (storeHash) 524 { 525 auto v = current.get(ItemNode(hash, value)); 526 } 527 else 528 { 529 auto v = current.get(ItemNode(value)); 530 } 531 532 if (v !is null) 533 return cast(typeof(return)) v; 534 } 535 return null; 536 } 537 538 BucketNode* root; 539 mixin AllocatorState!Allocator; 540 } 541 542 static struct ItemNode 543 { 544 bool opEquals(ref const T v) const 545 { 546 static if (isPointer!T) 547 return *v == *value; 548 else 549 return v == value; 550 } 551 552 bool opEquals(ref const ItemNode other) const 553 { 554 static if (storeHash) 555 if (other.hash != hash) 556 return false; 557 static if (isPointer!T) 558 return *other.value == *value; 559 else 560 return other.value == value; 561 } 562 563 static if (storeHash) 564 Hash hash; 565 ContainerStorageType!T value; 566 567 static if (storeHash) 568 { 569 this(Z)(Hash nh, Z nv) 570 { 571 this.hash = nh; 572 this.value = nv; 573 } 574 } 575 else 576 { 577 this(Z)(Z nv) 578 { 579 this.value = nv; 580 } 581 } 582 } 583 584 mixin AllocatorState!Allocator; 585 Bucket[] buckets; 586 size_t _length; 587 } 588 589 /// 590 version(emsi_containers_unittest) unittest 591 { 592 import std.algorithm : canFind; 593 import std.array : array; 594 import std.range : walkLength; 595 import std.uuid : randomUUID; 596 597 auto s = HashSet!string(16); 598 assert(s.remove("DoesNotExist") == false); 599 assert(!s.contains("nonsense")); 600 assert(s.put("test")); 601 assert(s.contains("test")); 602 assert(!s.put("test")); 603 assert(s.contains("test")); 604 assert(s.length == 1); 605 assert(!s.contains("nothere")); 606 s.put("a"); 607 s.put("b"); 608 s.put("c"); 609 s.put("d"); 610 string[] strings = s[].array; 611 assert(strings.canFind("a")); 612 assert(strings.canFind("b")); 613 assert(strings.canFind("c")); 614 assert(strings.canFind("d")); 615 assert(strings.canFind("test")); 616 assert(*("a" in s) == "a"); 617 assert(*("b" in s) == "b"); 618 assert(*("c" in s) == "c"); 619 assert(*("d" in s) == "d"); 620 assert(*("test" in s) == "test"); 621 assert(strings.length == 5); 622 assert(s.remove("test")); 623 assert(s.length == 4); 624 s.clear(); 625 assert(s.length == 0); 626 assert(s.empty); 627 s.put("abcde"); 628 assert(s.length == 1); 629 foreach (i; 0 .. 10_000) 630 { 631 s.put(randomUUID().toString); 632 } 633 assert(s.length == 10_001); 634 635 // Make sure that there's no range violation slicing an empty set 636 HashSet!int e; 637 foreach (i; e[]) 638 assert(i > 0); 639 640 enum MAGICAL_NUMBER = 600_000; 641 642 HashSet!int f; 643 foreach (i; 0 .. MAGICAL_NUMBER) 644 assert(f.insert(i)); 645 assert(f.length == f[].walkLength); 646 foreach (i; 0 .. MAGICAL_NUMBER) 647 assert(i in f); 648 foreach (i; 0 .. MAGICAL_NUMBER) 649 assert(f.remove(i)); 650 foreach (i; 0 .. MAGICAL_NUMBER) 651 assert(!f.remove(i)); 652 653 HashSet!int g; 654 foreach (i; 0 .. MAGICAL_NUMBER) 655 assert(g.insert(i)); 656 657 static struct AStruct 658 { 659 int a; 660 int b; 661 } 662 663 HashSet!(AStruct*, Mallocator, a => a.a) fred; 664 fred.insert(new AStruct(10, 10)); 665 auto h = new AStruct(10, 10); 666 assert(h in fred); 667 } 668 669 version(emsi_containers_unittest) unittest 670 { 671 static class Foo 672 { 673 string name; 674 } 675 676 hash_t stringToHash(string str) @safe pure nothrow @nogc 677 { 678 hash_t hash = 5381; 679 return hash; 680 } 681 682 hash_t FooToHash(Foo e) pure @safe nothrow @nogc 683 { 684 return stringToHash(e.name); 685 } 686 687 HashSet!(Foo, Mallocator, FooToHash) hs; 688 auto f = new Foo; 689 hs.insert(f); 690 assert(f in hs); 691 auto r = hs[]; 692 } 693 694 version(emsi_containers_unittest) unittest 695 { 696 static class Foo 697 { 698 string name; 699 this(string n) { this.name = n; } 700 bool opEquals(const Foo of) const { 701 if(of !is null) { return this.name == of.name; } 702 else { return false; } 703 } 704 } 705 706 hash_t stringToHash(string str) @safe pure nothrow @nogc 707 { 708 hash_t hash = 5381; 709 return hash; 710 } 711 712 hash_t FooToHash(in Foo e) pure @safe nothrow @nogc 713 { 714 return stringToHash(e.name); 715 } 716 717 string foo = "foo"; 718 HashSet!(const(Foo), Mallocator, FooToHash) hs; 719 const(Foo) f = new const Foo(foo); 720 hs.insert(f); 721 assert(f in hs); 722 auto r = hs[]; 723 assert(!r.empty); 724 auto fro = r.front; 725 assert(fro.name == foo); 726 } 727 728 version(emsi_containers_unittest) unittest 729 { 730 hash_t maxCollision(ulong x) 731 { 732 return 0; 733 } 734 735 HashSet!(ulong, Mallocator, maxCollision) set; 736 auto ipn = set.ITEMS_PER_NODE; // Need this info to trigger this bug, so I made it public 737 assert(ipn > 1); // Won't be able to trigger this bug if there's only 1 item per node 738 739 foreach (i; 0 .. 2 * ipn - 1) 740 set.insert(i); 741 742 assert(0 in set); // OK 743 bool ret = set.insert(0); // 0 should be already in the set 744 assert(!ret); // Fails 745 assert(set.length == 2 * ipn - 1); // Fails 746 } 747 748 version(emsi_containers_unittest) unittest 749 { 750 import std.experimental.allocator.showcase; 751 auto allocator = mmapRegionList(1024); 752 auto set = HashSet!(ulong, typeof(&allocator))(0x1000, &allocator); 753 set.insert(124); 754 }